library(tidyverse)
library(estimatr)
library(ggthemes)
library(ggrepel)
library(cowplot)
library(xtable)

###SET WORKING DIRECTORY and LOAD FILES ----

set.seed(12345678)

df <- read.csv("b_data/3_long_data_for_analysis.csv")

####Make variables factors and rename other variables ----
df$Faces <- as.factor(df$Faces)
df$Background <- as.factor(df$Background)
df$Identifies <- as.factor(df$Identifies)
df$Language <- as.factor(df$Language)
df$Education <- as.factor(df$Education)
df$Profession <- as.factor(df$Profession)
df$Religion <- as.factor(df$Religion)
df$Political <- as.factor(df$Political)
df$Spouse <- as.factor(df$Spouse)

df <- rename(df, id = caseid)

df <- df %>%
  mutate(selected = 
           case_when(profile == "A" & choice == "1" ~ 1,
                     profile == "A" & choice == "2" ~ 0,
                     profile == "B" & choice == "1" ~ 0,
                     profile == "B" & choice == "2" ~ 1))

df <- df %>%
  mutate(profile = 
           case_when(profile == "A" ~ 1,
                     profile == "B" ~ 2))

df <- df %>%
  mutate(race_n = 
           case_when(race == "White respondent" ~ 0,
                     race == "Black respondent" ~ 1))


Attribute_order <- c("Faces", "Background",
                     "Identifies", "Language", "Education", "Profession", 
                     "Religion", "Political", "Spouse")

Level_order <- c(c("White man", "White woman", "Light man", "Light woman",
                   "Medium man", "Medium woman", "Brown man", "Brown woman",
                   "Black man", "Black woman"),
                 c("Legal immigrant", "Undoc. immigrant",
                   "Naturalized citizen", 
                   "US-born legal parents",
                   "US-born undoc. parents",
                   "US-born legal grandparents",
                   "US-born undoc. grandparents"),
                 c("American", "Latino",
                   "Hispanic", "Mexican-American",
                   "Mexican", "White", "Black"),
                 c("Only English",
                   "Bilingual",
                   "Limited English",
                   "No English"),
                 c("High school dropout",
                   "High school graduate",
                   "Bachelor's degree",
                   "Advanced/graduate degree"),
                 c("Waiter", "Janitor",
                   "Sales manager", "Teacher",
                   "IT professional", "Doctor",
                   "Small business owner"),
                 c("Presbyterian", "Evangelical",
                   "Catholic", "Not religious"),
                 c("Democrat", "Republican",
                   "Independent", "Not political"),
                 c("Not married", "S. is American",
                   "S. is Black", "S. is White",
                   "S. is same as profile"))


check_n <- df %>%
  select(id, race_n) %>% 
  distinct() %>% 
  count(race_n) 

n0 <- check_n %>% filter(race_n == 0) %>% select(n) %>% pull()
n1 <- check_n %>% filter(race_n== 1) %>% select(n) %>% pull()

mod_labels <- c("White respondent", "Black respondent")

mod_labels <- c(paste0(mod_labels[1], "\n(N = ", n0, ")"),
                paste0(mod_labels[2], "\n(N = ", n1, ")"))

df <- df %>% 
  select("id", "task", "profile", "selected", "Faces", "Background",
         "Identifies", "Language", "Education", "Profession", 
         "Religion", "Political", "Spouse", "moderator" = race_n) %>%
  mutate_if(is.factor, as.character) %>% 
  gather("attribute", "level", "Faces", "Background",
         "Identifies", "Language", "Education", "Profession", 
         "Religion", "Political", "Spouse") 

unique_id <- df %>% 
  select(id) %>% 
  distinct() %>% 
  pull()

df_bootstrapped <- data.frame(
  draws = rep(1:200, length(unique_id)), 
  id = sample(unique_id, length(unique_id) * 200, replace = TRUE) 
) %>% 
  left_join(df, by = "id")

alpha <- 0.05

results2b <- df_bootstrapped %>%
  group_by(draws, attribute, level, moderator) %>% 
  summarise(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(salience = abs(average - 0.5)) %>% 
  group_by(draws, moderator, attribute) %>% 
  summarise(average_salience = mean(salience)) %>% 
  ungroup() %>% 
  spread(moderator, average_salience) %>% 
  mutate(diff = (`1` - `0`)) %>% 
  group_by(attribute) %>% 
  summarise(difference_mean = mean(diff),
            difference_bs.low = quantile(diff, alpha / 2),
            difference_bs.high = quantile(diff, 1 - alpha / 2)) %>%
  ungroup() %>% 
  mutate(sig = ifelse((difference_bs.low > 0 & difference_bs.high > 0) | 
                        (difference_bs.low < 0 & difference_bs.high < 0), 1, 0))


results2a <- df %>% 
  group_by(attribute, level, moderator) %>% 
  summarise(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(deviation = abs(average - 0.5)) %>% 
  group_by(attribute, moderator) %>% 
  summarise(salience = mean(deviation)) %>% 
  ungroup() %>% 
  spread(moderator, salience) %>% 
  left_join(results2b %>% select(attribute, sig), by = "attribute")


#Salience analysis by race (Figure 3) ----

g2a <- ggplot(data = results2a,
              aes(x = `0`,
                  y = `1`)) +
  geom_abline(slope = 1, color = "gray80") +
  geom_point(aes(color = as.factor(sig))) +
  geom_text_repel(aes(label = attribute,
                      color = as.factor(sig))) +
  coord_equal(ylim = c(0, 0.13),
              xlim = c(0, 0.13)) +
  scale_x_continuous(breaks =  c(0, 0.05, 0.10)) +
  scale_y_continuous(breaks = c(0, 0.05, 0.10)) +
  scale_color_manual(values = c("darkgray", "black")) +
  guides(color = FALSE) +
  theme_few() +
  labs(x = mod_labels[1],
       y = mod_labels[2])

g2b <- ggplot(data = results2b) +
  geom_hline(yintercept = 0, 
             color = "gray70",
             linetype = "dashed") +
  geom_pointrange(aes(x = reorder(attribute, difference_mean),
                      y = difference_mean,
                      ymin = difference_bs.low,
                      ymax = difference_bs.high, 
                      color = as.factor(sig))) + 
  coord_flip() +
  scale_color_manual(values = c("darkgray", "black")) +
  guides(color = FALSE) +
  labs(y = "Difference in attribute salience", 
       x = NULL) +
  theme_few()

figure_3 <- plot_grid(g2a, g2b, nrow = 1, align = "h")

# Save all the results ----------------------------------------------------

ggsave("c_figures/figure_3.pdf", width = 12, height = 5, scale = 0.9, plot = figure_3)

###Table A7 and A8-----

print.xtable(xtable(results2a), "d_tables/table_A7.tex", type = "latex", include.rownames = FALSE)

print.xtable(xtable(results2b), "d_tables/table_A8.tex", type = "latex", include.rownames = FALSE)

#### Clear environment ----

rm(list = ls())


# References ------------
### Code adapted from: 
### Clayton, Katherine, Jeremy Ferwerda, and Yusaku Horiuchi. 2021. 
### “Exposure to Immigration and Admission Preferences: Evidence from France.” 
### Political Behavior 43 (1): 175–200. https://doi.org/10.1007/s11109-019-09550-z.
